import geopandas as pd
import contextily as ctx # Used for contextual basemaps
import matplotlib.pyplot as plt
from geocube.api.core import make_geocube # Used for rasterizing
import os
import shapely
import imageio
import numpy as np
from IPython.display import Image
plt.rcParams['figure.figsize'] = (20, 20)
os.listdir("input")
/usr/local/lib/python3.8/dist-packages/geopandas/_compat.py:106: UserWarning: The Shapely GEOS version (3.8.0-CAPI-1.13.1 ) is incompatible with the GEOS version PyGEOS was compiled with (3.9.0-CAPI-1.16.2). Conversions between both will be slow. warnings.warn(
['lds-nz-8m-digital-elevation-model-2012-GTiff-auckland-region.zip', 'statsnzpopulation-by-meshblock-2013-census-FGDB.zip', 'statsnz2018-census-electoral-population-meshblock-2020-FGDB.zip', 'statsnzregional-council-2021-clipped-generalised-FGDB.zip', 'lris-lcdb-v50-land-cover-database-version-50-mainland-new-zealand-FGDB.zip']
First, read regional council bounds. This geometry will be used to clip NZ-wide datasets to just the region of interest, Auckland
%%time
REGC = pd.read_file("input/statsnzregional-council-2021-clipped-generalised-FGDB.zip!regional-council-2021-clipped-generalised.gdb")
AKL = REGC[REGC.REGC2021_V1_00_NAME == "Auckland Region"].copy()
# Filter out islands
AKL["geometry"] = max(AKL.geometry.explode(), key=lambda a: a.area)
# Coordinate reference system (projection)
print(AKL.crs)
# Simplify geometry to speed up clip operations
AKL = AKL.simplify(1000).buffer(1000)
ax = AKL.to_crs(epsg=3857).boundary.plot()
ax.set_title("Auckland Region clip extent")
ctx.add_basemap(ax)
epsg:2193 CPU times: user 1.87 s, sys: 896 ms, total: 2.77 s Wall time: 14.7 s
Load the LRIS Land Cover Database (downloaded in GDB format from https://lris.scinfo.org.nz/layer/104400-lcdb-v50-land-cover-database-version-50-mainland-new-zealand/)
%%time
df = pd.read_file("zip://input/lris-lcdb-v50-land-cover-database-version-50-mainland-new-zealand-FGDB.zip!lcdb-v50-land-cover-database-version-50-mainland-new-zealand.gdb")
CPU times: user 1min 45s, sys: 1.95 s, total: 1min 47s Wall time: 1min 47s
print(df.columns)
print(df.crs)
display(df.sample(5))
Index(['Name_2018', 'Name_2012', 'Name_2008', 'Name_2001', 'Name_1996',
'Class_2018', 'Class_2012', 'Class_2008', 'Class_2001', 'Class_1996',
'Wetland_18', 'Wetland_12', 'Wetland_08', 'Wetland_01', 'Wetland_96',
'Onshore_18', 'Onshore_12', 'Onshore_08', 'Onshore_01', 'Onshore_96',
'EditAuthor', 'EditDate', 'LCDB_UID', 'geometry'],
dtype='object')
epsg:2193
| Name_2018 | Name_2012 | Name_2008 | Name_2001 | Name_1996 | Class_2018 | Class_2012 | Class_2008 | Class_2001 | Class_1996 | ... | Wetland_96 | Onshore_18 | Onshore_12 | Onshore_08 | Onshore_01 | Onshore_96 | EditAuthor | EditDate | LCDB_UID | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 442860 | Exotic Forest | Exotic Forest | Exotic Forest | Exotic Forest | Exotic Forest | 71 | 71 | 71 | 71 | 71 | ... | no | yes | yes | yes | yes | yes | Landcare Research | 2014-06-30T00:00:00 | lcdb1000209174 | MULTIPOLYGON (((1870798.579 5723740.899, 18708... |
| 284667 | Landslide | Landslide | Landslide | Landslide | Landslide | 12 | 12 | 12 | 12 | 12 | ... | no | yes | yes | yes | yes | yes | Terralink | 2004-06-30T00:00:00 | lcdb1000010713 | MULTIPOLYGON (((1837870.064 5585804.793, 18378... |
| 61157 | Herbaceous Freshwater Vegetation | Herbaceous Freshwater Vegetation | Herbaceous Freshwater Vegetation | Herbaceous Freshwater Vegetation | Herbaceous Freshwater Vegetation | 45 | 45 | 45 | 45 | 45 | ... | yes | yes | yes | yes | yes | yes | Landcare Research | 2019-12-01T00:00:00 | lcdb2000223512 | MULTIPOLYGON (((1229977.484 4918634.298, 12299... |
| 359025 | Indigenous Forest | Indigenous Forest | Indigenous Forest | Indigenous Forest | Indigenous Forest | 69 | 69 | 69 | 69 | 69 | ... | no | yes | yes | yes | yes | yes | Terralink | 2004-06-30T00:00:00 | lcdb2000194432 | MULTIPOLYGON (((1590765.692 5465294.194, 15907... |
| 236197 | Manuka and/or Kanuka | Manuka and/or Kanuka | Manuka and/or Kanuka | High Producing Exotic Grassland | High Producing Exotic Grassland | 52 | 52 | 52 | 40 | 40 | ... | no | yes | yes | yes | yes | yes | Landcare Research | 2011-06-30T00:00:00 | lcdb1000044114 | MULTIPOLYGON (((1777764.835 5468995.836, 17777... |
5 rows × 24 columns
%%time
df = pd.clip(df, AKL)
CPU times: user 45.7 s, sys: 0 ns, total: 45.7 s Wall time: 45.7 s
df.sample(5)
| Name_2018 | Name_2012 | Name_2008 | Name_2001 | Name_1996 | Class_2018 | Class_2012 | Class_2008 | Class_2001 | Class_1996 | ... | Wetland_96 | Onshore_18 | Onshore_12 | Onshore_08 | Onshore_01 | Onshore_96 | EditAuthor | EditDate | LCDB_UID | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 13273 | Exotic Forest | Exotic Forest | Exotic Forest | Exotic Forest | Exotic Forest | 71 | 71 | 71 | 71 | 71 | ... | no | yes | yes | yes | yes | yes | Terralink | 2004-06-30T00:00:00 | lcdb1000217712 | POLYGON ((1779221.876 5900779.701, 1779215.508... |
| 240237 | Herbaceous Freshwater Vegetation | Herbaceous Freshwater Vegetation | Herbaceous Freshwater Vegetation | Herbaceous Freshwater Vegetation | Herbaceous Freshwater Vegetation | 45 | 45 | 45 | 45 | 45 | ... | yes | yes | yes | yes | yes | yes | Regional Council | 2014-06-30T00:00:00 | lcdb1000407321 | POLYGON ((1714415.346 5953973.742, 1714397.707... |
| 294964 | Manuka and/or Kanuka | Manuka and/or Kanuka | Manuka and/or Kanuka | Manuka and/or Kanuka | Manuka and/or Kanuka | 52 | 52 | 52 | 52 | 52 | ... | no | yes | yes | yes | yes | yes | Terralink | 2004-06-30T00:00:00 | lcdb1000094591 | POLYGON ((1759394.386 5945791.678, 1759378.236... |
| 240078 | High Producing Exotic Grassland | High Producing Exotic Grassland | Exotic Forest | Exotic Forest | Exotic Forest | 40 | 40 | 71 | 71 | 71 | ... | no | yes | yes | yes | yes | yes | Landcare Research | 2019-12-01T00:00:00 | lcdb1000407310 | POLYGON ((1761208.096 5887484.787, 1761205.207... |
| 244441 | Broadleaved Indigenous Hardwoods | Broadleaved Indigenous Hardwoods | Broadleaved Indigenous Hardwoods | Broadleaved Indigenous Hardwoods | Broadleaved Indigenous Hardwoods | 54 | 54 | 54 | 54 | 54 | ... | no | yes | yes | yes | yes | yes | Terralink | 2004-06-30T00:00:00 | lcdb1000120645 | POLYGON ((1784966.148 5886235.716, 1784931.191... |
5 rows × 24 columns
df.Name_2018.value_counts()
Exotic Forest 3981 Indigenous Forest 3673 Manuka and/or Kanuka 2282 Broadleaved Indigenous Hardwoods 1788 Built-up Area (settlement) 1350 High Producing Exotic Grassland 1326 Mangrove 1151 Urban Parkland/Open Space 1099 Estuarine Open Water 441 Orchard, Vineyard or Other Perennial Crop 436 Short-rotation Cropland 362 Lake or Pond 326 Herbaceous Saline Vegetation 303 Low Producing Grassland 291 Gorse and/or Broom 287 Forest - Harvested 266 Sand or Gravel 252 Deciduous Hardwoods 201 Surface Mine or Dump 132 Mixed Exotic Shrubland 120 Herbaceous Freshwater Vegetation 118 Transport Infrastructure 107 River 15 Gravel or Rock 9 Flaxland 9 Fernland 2 Matagouri or Grey Scrub 1 Name: Name_2018, dtype: int64
These classes are far too detailed - simplify to just Urban, Vegetation, Water, Other
def simplify_classes(code):
if code in [1, 2, 5]:
return 1, "Urban"
elif code in [68,69,71]:
return 2, "Vegetation"
elif code in [0,20,21,22,45,46]:
return 3, "Water"
else:
return 4, "Other"
summary = []
years = [1996, 2001, 2008, 2012, 2018]
for year in years:
print(year)
class_year = f"Class_{year}"
df[class_year + "_simplified_code"] = df[class_year].apply(lambda c: simplify_classes(c)[0])
df[class_year + "_simplified_name"] = df[class_year].apply(lambda c: simplify_classes(c)[1])
summary.append(df[class_year + "_simplified_name"].value_counts())
1996 2001 2008 2012 2018
pd.GeoDataFrame(summary).plot.area()
<AxesSubplot:>
%%capture
# %%capture suppresses output
ims = []
years = [1996, 2001, 2008, 2012, 2018]
for year in years:
ax = df.plot(column=f'Class_{year}_simplified_name', legend=True)
ax.set_title(year)
ax.figure.tight_layout()
canvas = ax.figure.canvas
canvas.draw() # draw the canvas, cache the renderer
image = np.frombuffer(canvas.tostring_rgb(), dtype='uint8')
image = image.reshape(canvas.get_width_height()[::-1] + (3,))
ims.append(image)
imageio.mimsave("land_use.gif", ims, fps=1)
with open('land_use.gif','rb') as file:
display(Image(file.read()))
cols = [f"Class_{year}_simplified_code" for year in years]
cols
['Class_1996_simplified_code', 'Class_2001_simplified_code', 'Class_2008_simplified_code', 'Class_2012_simplified_code', 'Class_2018_simplified_code']
%%time
geocube = make_geocube(
vector_data=df,
output_crs="epsg:2193",
measurements=cols,
resolution=(-100, 100)
)
geocube
CPU times: user 20.1 s, sys: 9.77 ms, total: 20.1 s Wall time: 20.1 s
<xarray.Dataset>
Dimensions: (x: 1001, y: 1320)
Coordinates:
* y (y) float64 6.002e+06 6.002e+06 ... 5.87e+06
* x (x) float64 1.704e+06 1.704e+06 ... 1.804e+06
spatial_ref int64 0
Data variables:
Class_1996_simplified_code (y, x) float64 nan nan nan nan ... nan nan nan
Class_2001_simplified_code (y, x) float64 nan nan nan nan ... nan nan nan
Class_2008_simplified_code (y, x) float64 nan nan nan nan ... nan nan nan
Class_2012_simplified_code (y, x) float64 nan nan nan nan ... nan nan nan
Class_2018_simplified_code (y, x) float64 nan nan nan nan ... nan nan nan
Attributes:
grid_mapping: spatial_refarray([6002350., 6002250., 6002150., ..., 5870650., 5870550., 5870450.])
array([1703950., 1704050., 1704150., ..., 1803750., 1803850., 1803950.])
array(0)
array([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])array([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])array([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])array([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])array([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])geocube.Class_2018_simplified_code.plot()
<matplotlib.collections.QuadMesh at 0x7f8598fa4820>
for year in years:
print(year)
outfile = f"output/land_use_{year}.tif"
if not os.path.isfile(outfile):
geocube[f"Class_{year}_simplified_code"].rio.to_raster(outfile)
1996 2001 2008 2012 2018
With land use taken care of, let's shift focus to population density
%%time
pop2013 = pd.read_file("input/statsnzpopulation-by-meshblock-2013-census-FGDB.zip!population-by-meshblock-2013-census.gdb")
CPU times: user 9.15 s, sys: 10.3 ms, total: 9.16 s Wall time: 9.14 s
%%time
pop2013 = pd.clip(pop2013, AKL)
CPU times: user 18.7 s, sys: 0 ns, total: 18.7 s Wall time: 18.7 s
display(pop2013.sample(5))
| Meshblock | MeshblockNumber | Population_Count_Usual_Resident_2013 | Population_Count_Census_Night_2013 | geometry | |
|---|---|---|---|---|---|
| 12535 | MB 0235501 | 0235501 | 285 | 276 | POLYGON ((1743996.780 5919716.323, 1743995.251... |
| 1233 | MB 0181618 | 0181618 | 228 | 231 | POLYGON ((1749092.530 5929429.139, 1749093.091... |
| 1361 | MB 0224120 | 0224120 | 222 | 225 | POLYGON ((1745938.819 5924548.364, 1745946.335... |
| 1132 | MB 0174607 | 0174607 | 201 | 201 | POLYGON ((1757145.453 5943600.323, 1757075.593... |
| 11596 | MB 0182026 | 0182026 | 81 | 78 | POLYGON ((1753929.247 5930220.327, 1753937.854... |
#pop2013.Population_Count_Usual_Resident_2013.replace(0, np.nan, inplace=True)
pop2013.Population_Count_Usual_Resident_2013.plot(kind="hist", bins=200)
<AxesSubplot:ylabel='Frequency'>
%%time
pop2013_cube = make_geocube(
vector_data=pop2013,
measurements=["Population_Count_Usual_Resident_2013"],
like=geocube # Ensures dimensions match
)
pop2013_cube
CPU times: user 2.73 s, sys: 0 ns, total: 2.73 s Wall time: 2.73 s
<xarray.Dataset>
Dimensions: (x: 1001, y: 1320)
Coordinates:
* y (y) float64 6.002e+06 ... 5.87e+06
* x (x) float64 1.704e+06 ... 1.804e+06
spatial_ref int64 0
Data variables:
Population_Count_Usual_Resident_2013 (y, x) float64 nan nan nan ... nan nan
Attributes:
grid_mapping: spatial_refarray([6002350., 6002250., 6002150., ..., 5870650., 5870550., 5870450.])
array([1703950., 1704050., 1704150., ..., 1803750., 1803850., 1803950.])
array(0)
array([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])pop2013_cube.Population_Count_Usual_Resident_2013.plot()
outfile = "output/pop2013.tif"
if not os.path.isfile(outfile):
pop2013_cube.Population_Count_Usual_Resident_2013.rio.to_raster()
%%time
pop2018 = pd.read_file("input/statsnz2018-census-electoral-population-meshblock-2020-FGDB.zip!2018-census-electoral-population-meshblock-2020.gdb")
CPU times: user 9.93 s, sys: 60 ms, total: 9.99 s Wall time: 9.97 s
%%time
pop2018 = pd.clip(pop2018, AKL)
CPU times: user 21.3 s, sys: 0 ns, total: 21.3 s Wall time: 21.3 s
display(pop2018.sample(5))
| MB2020_V2_00 | General_Electoral_Population | Maori_Electoral_Population | GED2020_V1_00 | GED2020_V1_00_NAME | GED2020_V1_00_NAME_ASCII | MED2020_V1_00 | MED2020_V1_00_NAME | MED2020_V1_00_NAME_ASCII | LAND_AREA_SQ_KM | AREA_SQ_KM | Shape_Length | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 16743 | 0585101 | 216 | 6 | 025 | Mt Roskill | Mt Roskill | 3 | Tāmaki Makaurau | Tamaki Makaurau | 0.051435 | 0.051435 | 1023.415916 | POLYGON ((1754048.851 5913261.309, 1754035.547... |
| 12599 | 0189104 | 24 | -999 | 031 | Northcote | Northcote | 5 | Te Tai Tokerau | Te Tai Tokerau | 0.013901 | 0.013901 | 482.741419 | POLYGON ((1752985.169 5925921.081, 1753032.190... |
| 16137 | 0518000 | 186 | 15 | 024 | Mt Albert | Mt Albert | 3 | Tāmaki Makaurau | Tamaki Makaurau | 0.050050 | 0.050050 | 1274.284146 | POLYGON ((1753484.633 5915232.683, 1753489.034... |
| 15197 | 0423300 | 114 | -999 | 001 | Auckland Central | Auckland Central | 3 | Tāmaki Makaurau | Tamaki Makaurau | 0.019930 | 0.019930 | 619.928826 | POLYGON ((1754796.745 5921269.491, 1754799.336... |
| 11765 | 0157201 | 45 | -999 | 010 | East Coast Bays | East Coast Bays | 5 | Te Tai Tokerau | Te Tai Tokerau | 0.014225 | 0.014225 | 532.261895 | POLYGON ((1755797.125 5934296.949, 1755797.178... |
pop2018.General_Electoral_Population.replace(-999, 0, inplace=True)
pop2018.General_Electoral_Population.plot(kind="hist", bins=200)
<AxesSubplot:ylabel='Frequency'>
%%time
pop2018_cube = make_geocube(
vector_data=pop2018,
measurements=["General_Electoral_Population"],
like=geocube # Ensures dimensions match
)
pop2018_cube
CPU times: user 2.59 s, sys: 10.1 ms, total: 2.6 s Wall time: 2.6 s
<xarray.Dataset>
Dimensions: (x: 1001, y: 1320)
Coordinates:
* y (y) float64 6.002e+06 6.002e+06 ... 5.87e+06
* x (x) float64 1.704e+06 1.704e+06 ... 1.804e+06
spatial_ref int64 0
Data variables:
General_Electoral_Population (y, x) float64 nan nan nan nan ... nan nan nan
Attributes:
grid_mapping: spatial_refarray([6002350., 6002250., 6002150., ..., 5870650., 5870550., 5870450.])
array([1703950., 1704050., 1704150., ..., 1803750., 1803850., 1803950.])
array(0)
array([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])pop2018_cube.General_Electoral_Population.plot()
outfile = "output/pop2018.tif"
if not os.path.isfile(outfile):
pop2018_cube.General_Electoral_Population.rio.to_raster(outfile)
!ls -Ggh output
total 71M -rw-r--r-- 1 11M Apr 15 10:25 land_use_1996.tif -rw-r--r-- 1 11M Apr 15 10:25 land_use_2001.tif -rw-r--r-- 1 11M Apr 15 10:25 land_use_2008.tif -rw-r--r-- 1 11M Apr 15 10:25 land_use_2012.tif -rw-r--r-- 1 11M Apr 15 10:25 land_use_2018.tif -rw-r--r-- 1 11M Apr 15 12:18 pop2013.tif -rw-r--r-- 1 11M Apr 15 12:27 pop2018.tif